knitr::opts_chunk$set(tidy = FALSE
                     ,cache = FALSE
                     ,fig.height =  8
                     ,fig.width  = 11)

library(tidyverse)
library(data.table)
library(dtplyr)

library(ggfortify)
library(plotly)


options(width = 80L)

set.seed(42)

1 Introduction to Time Series

Time series occur in almost any field of study that produces quantitative data. Whenever quantities are measured over time, those measurements form a time-series, or more formally, a discrete-time stochastic.

One reasonably famous example of a time-series is count of airline passengers in the US, as seen below. This is a fairly simple time-series, with measurements taken on a monthly basis over a number of years, with each datum consisting of a single number, i.e. this time-series is univariate.

data(AirPassengers)

str(AirPassengers)
##  Time-Series [1:144] from 1949 to 1961: 112 118 132 129 121 135 148 148 136 119 ...

Having quickly looked at the data, we now plot it.

plot(AirPassengers)

The above plot uses base R packages - which is limited, so we will look to run a similar plot but using tools built in ggplot2.

We could build this from scratch, but the package ggfortify will help.

autoplot(AirPassengers) +
    expand_limits(y = 0) +
    xlab("Year") +
    ylab("Count of Passengers ('000s)")

1.1 Overview

ap_decompose <- decompose(AirPassengers)

autoplot(ap_decompose)
## Warning: attributes are not identical across measure variables; they will be
## dropped
## Warning: Removed 24 rows containing missing values (geom_path).

Let’s try a multiplicative model.

ap_decompose_mult <- decompose(AirPassengers, type = 'multiplicative')

autoplot(ap_decompose_mult)
## Warning: attributes are not identical across measure variables; they will be
## dropped
## Warning: Removed 24 rows containing missing values (geom_path).

ap_decompose_stl <- stl(AirPassengers, s.window = 'periodic')

autoplot(ap_decompose_stl)

2 Autocorrelation (Serial Correlation)

2.1 Air Passengers

autoplot(AirPassengers) +
    ggtitle("Plot of the Air Passengers")

autoplot(diff(AirPassengers)) +
    ggtitle("Plot of the Diffs of the Air Passengers")

2.2 Air Passenger Correlogram

ap_decomp_resid <- ap_decompose$random[!is.na(ap_decompose$random)]

ap_acf <- acf(ap_decomp_resid, plot = FALSE)

autoplot(ap_acf) +
    ggtitle("Correlogram of the Air Passenger Residuals")

ap_pacf <- pacf(ap_decomp_resid, plot = FALSE)

autoplot(ap_pacf) +
    ggtitle("Partial Correlogram of the Air Passenger Residuals")

autoplot(pacf(diff(AirPassengers), plot = FALSE)) +
    ggtitle("Partial Correlogram of the Differenced Air Passenger Data")

3 ARMA and ARIMA Models

innovations <- rnorm(1000, 0, 1)

3.1 MA Models

ma_1 <- arima.sim(list(ma = 0.8), innov = innovations, n = 100)

output_plot <- ggplot() +
    geom_line(aes(x = seq_along(innovations[1:100]), y = innovations[1:100]), size = 0.5) +
    geom_line(aes(x = seq_along(ma_1), y = as.numeric(ma_1)), colour = 'red') +
    xlab("Time Step") +
    ylab("Value")

ggplotly(output_plot)
ma_2 <- arima.sim(list(ma = c(0.4, 0.4)), innov = innovations, n = 100)

output_plot <- ggplot() +
    geom_line(aes(x = seq_along(innovations[1:100]), y = innovations[1:100]), size = 0.5) +
    geom_line(aes(x = seq_along(ma_2), y = as.numeric(ma_2)), colour = 'red') +
    xlab("Time Step") +
    ylab("Value")

ggplotly(output_plot)
output_plot <- ggplot() +
    geom_line(aes(x = seq_along(innovations[1:100]), y = innovations[1:100]), size = 0.5) +
    geom_line(aes(x = seq_along(ma_1), y = as.numeric(ma_1)), colour = 'red') +
    geom_line(aes(x = seq_along(ma_2), y = as.numeric(ma_2)), colour = 'blue') +
    xlab("Time Step") +
    ylab("Value")

ggplotly(output_plot)

3.2 AR Series

Now that we have created MA series, we look at what the AR series look like.

ar_1 <- arima.sim(list(ar = 0.8), innov = innovations, n = 100)

output_plot <- ggplot() +
    geom_line(aes(x = seq_along(innovations[1:100]), y = innovations[1:100]), size = 0.5) +
    geom_line(aes(x = seq_along(ar_1), y = as.numeric(ar_1)), colour = 'red') +
    xlab("Time Step") +
    ylab("Value")

ggplotly(output_plot)
ar_2 <- arima.sim(list(ar = c(0.4, 0.4)), innov = innovations, n = 100)

output_plot <- ggplot() +
    geom_line(aes(x = seq_along(innovations[1:100]), y = innovations[1:100]), size = 0.5) +
    geom_line(aes(x = seq_along(ar_2), y = as.numeric(ar_2)), colour = 'red') +
    xlab("Time Step") +
    ylab("Value")

ggplotly(output_plot)
output_plot <- ggplot() +
    geom_line(aes(x = seq_along(innovations[1:100]), y = innovations[1:100]), size = 0.5) +
    geom_line(aes(x = seq_along(ar_1), y = as.numeric(ar_1)), colour = 'red') +
    geom_line(aes(x = seq_along(ar_2), y = as.numeric(ar_2)), colour = 'blue') +
    xlab("Time Step") +
    ylab("Value")

ggplotly(output_plot)

3.3 ARMA Models

arma_1_1 <- arima.sim(list(ar = 0.4, ma = 0.4), innov = innovations, n = 100)

output_plot <- ggplot() +
    geom_line(aes(x = seq_along(innovations[1:100]), y = innovations[1:100])) +
    geom_line(aes(x = seq_along(ar_1), y = as.numeric(ar_1)), colour = 'red') +
    geom_line(aes(x = seq_along(ma_1), y = as.numeric(ma_1)), colour = 'blue') +
    geom_line(aes(x = seq_along(arma_1_1), y = as.numeric(arma_1_1)), colour = 'green') +
    xlab("Time Step") +
    ylab("Value")

ggplotly(output_plot)